package com.tool_stdy.Spider;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 获取贴吧的邮箱
 * @author yan_li
 *
 */
public class NetSpider1 {

	private static List<String> list = new ArrayList<String>();
	private static volatile List<String> urlList = new ArrayList<String>();
	
	public static void getUrls(String beginUrl) throws Exception{
		URL url = new URL(beginUrl);
		URLConnection openConnection = url.openConnection();
		InputStream inputStream = openConnection.getInputStream();
		BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
		String urlRegex="/p/4222157139\\?pn=\\d";
		String line = "";
		Pattern p_Url=Pattern.compile(urlRegex);
		while ((line = reader.readLine()) != null) {
			Matcher url_ma=p_Url.matcher(line);
			while(url_ma.find()){
				urlList.add("http://tieba.baidu.com"+url_ma.group());
			}
			
		}
	}
	
	
	public static void getContent(String str_url) throws Exception {
		URL url = new URL(str_url);
		URLConnection openConnection = url.openConnection();
		InputStream inputStream = openConnection.getInputStream();
		BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));

		String regexEmails = "\\w+@\\w+(\\.\\w+)+";
		String line = "";
		Pattern p_email = Pattern.compile(regexEmails);
		while ((line = reader.readLine()) != null) {
			Matcher ma = p_email.matcher(line);
			while(ma.find()){
				list.add(ma.group());
			}
		}

	}

	public static void main(String[] args) throws Exception {
		String urlBegin="http://tieba.baidu.com/p/4222157139?pn=2";
		getUrls(urlBegin);
		for(String url:urlList){
			getContent(url);
			System.out.println("=========="+url);
		}
		System.out.println("***********************************");
		for(String mail:list){
			System.out.println(mail);
		}
//		String src="/p/4222157139?pn=2";
//		String urlRegex="/p/4222157139\\?pn=\\d";
//		System.out.println(src.matches(urlRegex));
	}
}
